XXX This needs data not included in the git release to be run XXX
load('../RData/kemri_case_data.RData')
load('Outputs/Kenya_dat_PSM.RData')
dat_kenya = cbind(kemri_case_data, dat_kenya)
Summarise the top and bottom 20th percentiles
upper_p = round(quantile(dat_kenya$P_SMs, probs=.8),3)
print(upper_p)
## 80%
## 0.924
lower_p = round(quantile(dat_kenya$P_SMs, probs=.2),3)
print(lower_p)
## 20%
## 0.183
top_ind = dat_kenya$P_SMs>upper_p
bottom_ind = dat_kenya$P_SMs<lower_p
sum(top_ind)
## [1] 446
sum(bottom_ind)
## [1] 444
writeLines(sprintf('For patients with P(SM|Data)>%s, %s%% have sickle trait versus %s%% for patients with P(SM|Data)< %s',
upper_p,
round(100*mean(dat_kenya$HbAS[top_ind],na.rm = T),1),
round(100*mean(dat_kenya$HbAS[bottom_ind],na.rm = T),1),
lower_p))
## For patients with P(SM|Data)>0.924, 0.7% have sickle trait versus 4.8% for patients with P(SM|Data)< 0.183
writeLines(sprintf('For patients with P(SM|Data)>%s, %s%% died versus %s%% for patients with P(SM|Data)< %s',
upper_p,
round(100*mean(dat_kenya$died[top_ind],na.rm = T),1),
round(100*mean(dat_kenya$died[bottom_ind],na.rm = T),1),
lower_p))
## For patients with P(SM|Data)>0.924, 6.1% died versus 18.8% for patients with P(SM|Data)< 0.183
writeLines(sprintf('For patients with P(SM|Data)>%s, %s%% were blood culture + versus %s%% for patients with P(SM|Data)< %s',
upper_p,
round(100*mean(dat_kenya$bloodculture_pos[top_ind],na.rm = T),1),
round(100*mean(dat_kenya$bloodculture_pos[bottom_ind],na.rm = T),1),
lower_p))
## For patients with P(SM|Data)>0.924, 1.8% were blood culture + versus 4.7% for patients with P(SM|Data)< 0.183
writeLines(sprintf('For patients with P(SM|Data)>%s, %s%% has Hb<5 versus %s%% for patients with P(SM|Data)< %s',
upper_p,
round(100*mean(dat_kenya$severe_malaria_anaemia[top_ind],na.rm = T),1),
round(100*mean(dat_kenya$severe_malaria_anaemia[bottom_ind],na.rm = T),1),
lower_p))
## For patients with P(SM|Data)>0.924, 27.4% has Hb<5 versus 34.9% for patients with P(SM|Data)< 0.183
writeLines(sprintf('For patients with P(SM|Data)>%s, %s%% had fewer than 1,000 parasites per uL versus %s%% for patients with P(SM|Data)< %s',
upper_p,
round(100*mean(dat_kenya$log_parasites[top_ind]<3,na.rm = T),1),
round(100*mean(dat_kenya$log_parasites[bottom_ind]<3,na.rm = T),1),
lower_p))
## For patients with P(SM|Data)>0.924, 7.4% had fewer than 1,000 parasites per uL versus 13.7% for patients with P(SM|Data)< 0.183
writeLines(sprintf('For patients with P(SM|Data)>%s, %s%% had more than 100,000 parasites per uL versus %s%% for patients with P(SM|Data)< %s',
upper_p,
round(100*mean(dat_kenya$log_parasites[top_ind]>5,na.rm = T),1),
round(100*mean(dat_kenya$log_parasites[bottom_ind]>5,na.rm = T),1),
lower_p))
## For patients with P(SM|Data)>0.924, 57.4% had more than 100,000 parasites per uL versus 28.5% for patients with P(SM|Data)< 0.183
myx = dat_kenya$P_SMs
xs = seq(0,1,length.out = 100)[-1]
##******** BLOOD CULTURE + ****************
par(mfrow=c(1,1))
mod = gam(BCP ~ s(x,k=4), family='binomial',
data=data.frame(x=myx,
BCP=dat_kenya$bloodculture_pos))
summary(mod)
##
## Family: binomial
## Link function: logit
##
## Formula:
## BCP ~ s(x, k = 4)
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.3651 0.1211 -27.8 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(x) 1.001 1.002 8.293 0.004 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.00338 Deviance explained = 1.25%
## UBRE = -0.69768 Scale est. = 1 n = 2220
preds = predict(mod, se.fit = T, newdata = data.frame(x=xs))
ys = 100*c(inv.logit(preds$fit+1.96*preds$se.fit),
rev(inv.logit(preds$fit-1.96*preds$se.fit)))
plot(xs, 100*inv.logit(preds$fit),type='l',lwd=3,
xlim = c(0,1),ylim = range(c(0,ys)),
xlab='P(Severe malaria | Data)',
ylab='Blood culture positive (%)')
polygon(c(xs, rev(xs)),ys, panel.first=grid(),
border = NA, col = adjustcolor('grey',.3))
mtext(text = '', side = 3, adj = 0, cex = 1.5)
abline(h = 100*mean(dat_kenya$bloodculture_pos),lty=2, lwd=2)
Explore anaemia a bit more
par(mfrow=c(2,2), las=1, bty='n')
layout(mat = matrix(data = c(1,1,2,3),nrow = 2, byrow = T))
plot(dat_kenya$P_SMs, dat_kenya$hb,
xlim=c(0,1),panel.first=grid(),
pch=20, xlab='P(Severe malaria | Data)',ylab='Haemoglobin (g/dL)')
abline(h=5,v=c(upper_p, lower_p), lty=2,lwd=3, col='red')
hist(dat_kenya$hb[bottom_ind], xlim=c(0,15),
breaks = 0:19, main = paste('P(Severe malaria | Data)',lower_p,sep='<'),
xlab='Hb (g/dL)')
abline(v=5, lty=2,lwd=3, col='red')
hist(dat_kenya$hb[top_ind], xlim=c(0,15),
breaks = 0:19, main = paste('P(Severe malaria | Data)',upper_p,sep='>'),
xlab='Hb (g/dL)')
abline(v=5, lty=2,lwd=3, col='red')
f=colorRampPalette(colors = RColorBrewer::brewer.pal(name = 'RdYlBu',n = 11))
n_levels = 11
mycuts = cut(x = dat_kenya$P_SMs, breaks = seq(0,1,by=1/n_levels))
cols_ind = as.numeric(mycuts)
mycols = f(n_levels)
par(las=1, mfrow=c(1,1), bty='n', family='serif',
cex.lab=1.5, cex.axis=1.5, mar = c(5,6,2,2))
my_fade = 0.6
## Sickle trait and thal homozygous
plot(log10(dat_kenya$platelet), log10(dat_kenya$wbc),
xaxt='n', yaxt='n', xlab='',
ylab='', pch=16, panel.first = grid(),
col = adjustcolor(mycols[cols_ind], my_fade))
mtext(side = 2, text = 'White count (x1000 per uL)',
cex=1.5, line = 3, las = 3)
mtext(side = 1, text = 'Platelet count (x1000 per uL)',
cex=1.5, line = 3)
axis(side = 1, at = 1:3, labels = 10^(1:3))
axis(side = 2, at = 0:2, labels = 10^(0:2))
axis(side = 1, at = log10(c(seq(10,100,by=10), seq(100,1000,by=100))),
tick = T, labels = F)
axis(side = 2, at = log10(c(seq(1,10,by=1), seq(10,100,by=10))),
tick = T, labels = F)
ind_double = dat_kenya$HbAS==1 & dat_kenya$thal==3
ind_single = dat_kenya$HbAS==1 & dat_kenya$thal<3
# points(log10(dat_kenya$platelet[ind_double]),
# log10(dat_kenya$wbc[ind_double]),
# col='black',pch=18,cex=1.8)
# legend('bottomright', pch = 18, col = c('black'),
# inset = 0, cex=1.3,bty='n',
# legend = c('HbAS & HZ-alpha-thal'))
lgd_ = rep(NA, n_levels)
lgd_[c(1,6,11)] = c(0,0.5,1)
legend('topleft',
legend = lgd_,
fill = mycols,bty='n',x.intersp =.5,
border = NA,inset = 0.03,
y.intersp = 0.5,title = 'P(SM | Data)\n',
cex = 1, text.font = 1)
## Bacteremia
plot(log10(dat_kenya$platelet), log10(dat_kenya$wbc),
xaxt='n', yaxt='n', xlab='',pch=16, ylab='',
col = adjustcolor(mycols[cols_ind], my_fade))
axis(side = 1, at = 1:3, labels = 10^(1:3))
axis(side = 2, at = 0:2, labels = 10^(0:2))
axis(side = 1, at = log10(c(seq(10,100,by=10), seq(100,1000,by=100))),
tick = T, labels = F)
axis(side = 2, at = log10(c(seq(1,10,by=1), seq(10,100,by=10))),
tick = T, labels = F)
mtext(side = 2, text = 'White count (x1000 per uL)',
cex=1.5, line = 3, las = 3)
mtext(side = 1, text = 'Platelet count (x1000 per uL)',
cex=1.5, line = 3)
ind_blood_cult = dat_kenya$bloodculture_pos==1
points(log10(dat_kenya$platelet[ind_blood_cult]),
log10(dat_kenya$wbc[ind_blood_cult]),
col='black',pch=18,cex=1.5)
ind_salmonella = grep(pattern = 'salmonella',
x = dat_kenya$bacteria,ignore.case = T)
points(log10(dat_kenya$platelet[ind_salmonella]),
log10(dat_kenya$wbc[ind_salmonella]),
col='black',pch=1,cex=1.5)
legend('bottomright', pch = c(18,1), col = c('black'),
inset = 0, cex=1.3, bty='n',
legend = c('Bacteremia','NTS'))
all(kemri_case_data$sample_code==dat_kenya$sample_code)
## [1] TRUE
par(mfrow=c(2,1))
ind1 = (kemri_case_data$severe_malaria_anaemia==1)
hist(dat_kenya$P_SMs[ind1], breaks = 40, xlab='P(SM)', main='SMA')
ind1 = (kemri_case_data$cerebral_malaria==1)
hist(dat_kenya$P_SMs[ind1], breaks = 40, xlab='P(SM)', main='CM')